/*****************************************************************************
Description:
This program merges the relevant data files to create the base file for each
year and stacks them together to have the base file for all years.
	I. 		Merge in Enrollment
	II. 	Merge in Demo characteristics
	III. 	Merge in baselines
	IV. 	Merge in outcomes
*~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~*/

args bw

* 	append pscores

	clear all

	set obs 1
	forval year = 2016/2018 {
		preserve
		use "${cleandata}school_level_pscores_`bw'_ms_`year'.dta", clear

		// running variables
		merge 1:1 stu using "${cleandata}runvar_dataset_program_pscore_`bw'_ms_`year'.dta", gen(rv_merge)

		gen year_app = `year'
		gen grade = 6

		tempfile pscores_`year'
		save "`pscores_`year''"
		restore

		append using "`pscores_`year''"
	}
	drop in 1
	tempfile pscores
	save "`pscores'"

*	load enrollment file

	use "${cleandata}demo_all_years_ms.dta", clear
	bys stu grade (year): gen grade_attempt = _n
	by stu grade: egen last_grade_attempt = max(_n)

	gen enr_05 = sch if grade == "05" & grade_attempt == last_grade_attempt

	//grade restriction
	drop if inlist(grade, "0K", "AD", "IN", "PK")
	destring grade, replace
	keep if inrange(grade, 6, 8)

	//year restriction (first 6th grade entry during the sample period)
	bys stu: egen right_cohort = max(grade == 6  &  inrange(year,2017,2019)  &  grade_attempt == 1)
	keep if right_cohort == 1

*	merge in pscores

	//preliminaries
	gen year_app = year - 1
	destring stu, replace

	//have to enroll the year after application in 6th grade to be merged
	merge m:1 stu year_app grade using "`pscores'", gen(pscore_merge) keep(1 3)

***	outcomes

	cap tostring stu, replace
	* Merge 6th grade scores
	merge m:1 stu using "${cleandata}post_scores_standardized_grade_ms_6.dta", keep(1 3) gen(postsc_merge) keepusing(post_* scale* test_grade_*)
	foreach test in math ela total {
		ren scale_score_`test' post_scale_score_`test'
	}

***	controls

* 	demos

	destring swd ell, replace
	merge m:1 stu using "${cleandata}demo_info_05th_grade.dta", keep(1 3)

* 	baseline scores
	local score_vars raw_score_ela raw_score_math scale_score_ela scale_score_math test_grade_ela test_grade_math bl_ss_math bl_ss_ela bl_total
	merge m:1 stu using "${cleandata}baseline_scores_standardized_grade_ms_5.dta", keep(1 3) gen(base_sc_merge_5) keepusing(`score_vars')
	foreach var of varlist `score_vars' {
		ren `var' `var'_5
	}

	merge m:1 stu using "${cleandata}baseline_scores_standardized_grade_ms_3.dta", keep(1 3) gen(base_sc_merge_3) keepusing(`score_vars')
	foreach var of varlist `score_vars' {
		ren `var' `var'_3
	}
*	enrollment
	levelsof sch, local(allschools)
	foreach sch in `allschools'{
		gen enr_`sch' = (sch == "`sch'")
	}

* 	clean up

	destring stu bl_swd, replace

	//fill in missings
	qui mvencode enr_* pscore_* offer_* rv_*, mv(0) overr

	//drop constant runvars
	foreach rv of varlist rv*{
		qui: su `rv', meanonly
		if `r(mean)' == 0 drop `rv'
	}

*	rename

	rename (raw_score_ela_5 raw_score_math_5 scale_score_ela_5 scale_score_math_5 test_grade_ela_5 test_grade_math_5) ///
		   (bl_raw_ela bl_raw_math bl_scale_ela bl_scale_math bl_test_grade_ela bl_test_grade_math)
	rename (post_ss_math post_ss_ela post_ss_total bl_ss_math_5 bl_ss_ela_5 bl_total_5) (math ela total bl_math bl_ela bl_total)
	//enrollment
	rename enr_05* geo_enr_05*
	rename *swd *sped
	//pscores
	rename pscore_*_md1_* pscore_*_*
	rename pscore_*_md2_* pscore_*2_*
	rename pscore_*_pnp_* pscore_*3_*

*	keep only 6th grade observations (already merged to future outcomes and exposures)
	keep if grade == 6
	cap drop __*

*	save

	qui compress
	save "${cleandata}NYCms_basefile_`bw'.dta", replace
